**********************************************************
* This Stata dofile was written to accompany the paper:
* Amy King & Andrew Leigh, Beautiful Politicians (2009) Kyklos 62(4): 579-593
*
* Please feel free to use or adapt this dofile, so long as you cite that paper.
* There is an additional condition which attaches to the use of this data, which is that 
* you must not create or allow to be created any lists of 'the ugliest politicians'. 
* Specifically, by using this dataset, you agree not to list the names of those who fall towards the bottom of the beauty distribution. 
* You also agree not to enable others to do so, for example by republishing the raw data in a format other than Stata's DTA format.
*
* Use of this data implies acceptance of these conditions. If you do not agree to our conditions, you are quite free to follow our
* methodology by finding a database of politicians' photographs, and paying independent people to rate their beauty.
*
* Questions to andrew_leigh@ksg02.harvard.edu
**********************************************************

version 11
clear
set more off
set mem 10m
cd "C:\Documents and Settings\leigha\My publications\Aust - beautiful politicians etc\"

/*
* Setting up the Michigan raters
use beauty_ratings_michigan_students, clear
ren candidate candidatenb
for num 1/9 \ any dustinkolo davidlawrence ryangough sangjinpark kellybalon gbengaolumolade mingxu bharaniprathapkasma michaelweinstein: ren Y rating_us_X
sort candidatenb
save temp_beauty_ratings_michigan_students, replace

* Setting up a file of incumbents
insheet using "C:\Users\Andrew\Datasets\AEC data\2001\hcands.txt", clear delimiter(";")
gen firstname=word(candidate,2)
gen lastname=word(candidate,1)
replace lastname=subinstr(lastname,",","",.)
keep if member=="#"
keep party lastname firstname
sort party lastname firstname
save temp_2004incumbents, replace

* Setting up electorate demographics
use electoratedemographics2001.dta, clear
gen electorate4=substr(electoraldivision,1,4)
replace electorate4="KiSm" if electoraldivision=="Kingsford Smith (NSW)"
replace electorate4="MePo" if electoraldivision=="Melbourne Ports (Vic)"
replace electorate4="BrFi" if electoraldivision=="Bradfield (NSW)"
* For the two electorates that were created in the 2003 redistribution, the demog variables 
* take the mean of the main electorates covering that area in the 2001 election. 
* Thus Bonner is the average for Bowman and Griffith, 
* while Gorton is the average for Burke, Calwell and Maribyrnong.
expand 3 if electorate4=="BrFi"
bysort electorate4: egen seq=seq()
replace electorate4="Bonn" if seq==2
replace electorate4="Gort" if seq==3
drop seq
for any  medianage sharenotfluentinenglish medianfamilyincome shareyr10below sharewomeninlf sharewithtertiaryquals sharewithnoquals sharewithtradequals: egen temp1=mean(X) if electorate4=="Grif" | electorate4=="Bowm" \ egen temp2=max(temp1) \ replace X=temp2 if electorate4=="Bonn" \ egen temp3=mean(X) if electorate4=="Burk" | electorate4=="Calw" | electorate4=="Mari" \ egen temp4=max(temp3) \ replace X=temp4 if electorate4=="Gort" \ drop temp*
sort electorate4
save temp, replace

* Loading elections data
use elections1993_2004.dta, clear

* Merging on informal vote in 2004
sort electorate
merge electorate using informalvote_2004
tab _merge if election==2004
drop _merge

* Merging on electorate characteristics
gen electorate4=substr(electorate,1,4)
replace electorate4="KiSm" if electorate=="Kingsford Smith" | electorate=="Kingsford-Smith"
replace electorate4="MePo" if electorate=="Melbourne Ports"
replace electorate4="BrFi" if electorate=="Bradfield"
sort electorate4
merge electorate4 using temp, update
tab _merge
drop _merge

* Calculating voteshare
bysort election electorate: egen totalvote=sum(primaryvote)
gen voteshare=primaryvote/totalvote
bysort electorate: egen totalcandidates=max(ballotposition)
gen logvoteshare=ln(voteshare)

* Generating sex
gen female=1 if gender=="F"
replace female=0 if gender=="M"

* Generating a candidate ID code
egen candidate=group(firstname lastname),

* Generating party ID
egen partyno=group(party)
egen party_election=group(party election)

* Generating ballot order variables
tab ballotposition, gen(balpos)

keep if election==2004
replace firstname="J N" if lastname=="ZIGOURAS"

* Merging on beauty data
sort lastname firstname
merge lastname firstname using beauty_ratings, 
tab _merge
list if _merge==2
keep if _merge==3
drop _merge
* Giving John Zigouras his first name
replace firstname="John" if lastname=="ZIGOURAS"
* Coding CLP as National
replace party="NP" if party=="CLP"

* Merging on incumbency
replace firstname=word(firstname,1)
replace lastname=upper(lastname)
sort party lastname firstname
merge party lastname firstname using temp_2004incumbents, nokeep
ren _merge incumbent
recode incumbent 3=1 *=0
list firstname lastname if incumbent==0

* Adding the 9 Michigan raters
sort candidatenb
merge candidatenb using temp_beauty_ratings_michigan_students
tab _merge
drop _merge ratingrnewman
ren ratinganewman rating_us_10
save beauty_merged_for_amy, replace
*/

*******************************************
* Correlations and summary statistics
*******************************************
use beauty_merged_for_amy, clear

* Correlations in beauty
pwcorr rating1 rating2 rating3 rating4 ,

* Standardising beauty data
sum rating*
for var rating*: egen temp=std(X) \ replace X=temp \ drop temp
ren averagebeautyrating averagebeauty
replace averagebeauty=(rating1+rating2+rating3+rating4)/4
egen averagebeauty_us=rowmean(rating_us_1 rating_us_2 rating_us_3 rating_us_4 rating_us_5 rating_us_6 rating_us_7 rating_us_8 rating_us_9 rating_us_10)
* Re-standardizing the sum
egen temp=std(averagebeauty)
replace averagebeauty=temp
drop temp
egen temp=std(averagebeauty_us)
replace averagebeauty_us=temp
drop temp

pwcorr averagebeauty averagebeauty_us

***************************
* Summary statistics
***************************
gen beauty_pct=100*normal(averagebeauty)
format beauty_pct %2.0f
egen beauty_rank=rank(averagebeauty), field
for any female incumbent party : tabstat averagebeauty beauty_pct, by(X) stat(mean sd n)
for any female incumbent party: tabstat voteshare, by(X) stat(mean sd n)
for num 0/1: egen max_beautyX=max(averagebeauty) if female==X
list firstname lastname party electorate averagebeauty voteshare if averagebeauty==max_beauty0 & averagebeauty~=.
list firstname lastname party electorate averagebeauty voteshare if averagebeauty==max_beauty1 & averagebeauty~=.
list firstname lastname party electorate averagebeauty beauty_pct beauty_rank voteshare if lastname=="HOWARD" | lastname=="LATHAM" | lastname=="BEAZLEY" | lastname=="RUDD" | lastname=="GILLARD" | lastname=="MACKLIN"
gsort -averagebeauty
list firstname lastname party electorate beauty_pct beauty_rank in 1/10
drop beauty_pct beauty_rank

***************************
* Regressions
***************************
reg voteshare averagebeauty incumbent female,r cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster replace bdec(3) se ct("No Party FE")
areg voteshare averagebeauty incumbent female,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE")
gen b_incumbent=averagebeauty*incumbent
gen b_female=averagebeauty*female
areg voteshare averagebeauty b_incumbent incumbent female,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE")
areg voteshare averagebeauty b_female incumbent female,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE")
areg voteshare averagebeauty b_incumbent b_female incumbent female,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE")
reg voteshare averagebeauty b_incumbent b_female incumbent female,r cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("No Party FE")
* Within-electorate comparisons
bysort electorate: egen max=max(averagebeauty)
bysort electorate: egen min=min(averagebeauty)
gen beauty_gap=averagebeauty-max if averagebeauty~=max
replace beauty_gap=averagebeauty-min if averagebeauty~=min 
drop min max 
areg voteshare averagebeauty incumbent female if beauty_gap~=.,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Gap Nonmissing - Party FE")
areg voteshare beauty_gap incumbent female,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Gap - Party FE")
areg voteshare beauty_gap averagebeauty incumbent female,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Gap & Beauty - Party FE")
* Non-linearities
xtile beauty=averagebeauty, nq(3) 
tab beauty
char beauty[omit] 2
*xi: reg voteshare i.beauty incumbent female,r cl(electorate)
xi: areg voteshare i.beauty incumbent female,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("NonLinearities - Party FE")
test _Ibeauty_1=-_Ibeauty_3
gen averagebeauty2=averagebeauty^2
*reg voteshare averagebeauty averagebeauty2 incumbent female,r cl(electorate)
areg voteshare averagebeauty averagebeauty2 incumbent female,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("NonLinearities - Party FE")
drop averagebeauty2 beauty

***************************
* Robustness checks
***************************
* Separately by rater
reg voteshare rating1-rating4 incumbent female,r cl(electorate)
areg voteshare rating1-rating4 incumbent female,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE")
testparm rating1-rating4
* Drop famous candidates
gen famous=1 if lastname=="LATHAM" | lastname=="CREAN" | lastname=="BEAZLEY" | lastname=="HOWARD" | lastname=="COSTELLO" | lastname=="ABBOTT" | lastname=="DOWNER" | (firstname=="Brendan" & lastname=="NELSON")
areg voteshare averagebeauty incumbent female if famous==.,a(partyno) r  cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("No famous candidates, Party FE")
* US raters
areg voteshare averagebeauty_us incumbent female, r a(partyno) 
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE")
xi: ivreg2 voteshare incumbent female (averagebeauty=averagebeauty_us) i.partyno, first r cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE")
* Drop non-Anglo
for any asian southerneuropean middleeast otherimmigrant: areg voteshare averagebeauty incumbent female if ethnicityX==0,r a(partyno) cl(electorate) \ outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE, excl X")
areg voteshare averagebeauty incumbent female if ethnicityasian==0 & ethnicitysoutherneuropean==0 & ethnicitymiddleeast==0 & ethnicityotherimmigrant==0,r a(partyno) cl(electorate) 
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE, excl non-Anglo")
* Control for candidate age
gen age2=age^2
areg voteshare averagebeauty age incumbent female,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE")
areg voteshare averagebeauty age age2 incumbent female,r a(partyno) cl(electorate)
outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE")

***************************
* Productivity or Discrimination?
***************************
sort electorate4 
merge electorate4 using temp_aes_interest
*for any knowledge interest_politics interest_election wouldvote carewhowins: egen temp=std(X) \ replace X=temp \ drop temp
*gen interest=(interest_politics+interest_election+knowledge)
*for any interest knowledge interest_politics interest_election wouldvote carewhowins: gen b_X=X*averagebeauty
*for any interest knowledge interest_politics interest_election wouldvote carewhowins: areg voteshare averagebeauty b_X X incumbent female,a(partyno) r cl(electorate) \ outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Party FE")
for any apathetic uninterestp unintereste: gen X_orig=X 
for any apathetic uninterestp unintereste: egen temp=std(X) \ replace X=temp \ drop temp
for any apathetic uninterestp unintereste: sum X_orig if X <0 \ sum X_orig if X>=0
for any apathetic uninterestp unintereste: gen b_X=X*averagebeauty
for any apathetic uninterestp unintereste: areg voteshare averagebeauty incumbent female if X>=0,a(partyno) r cl(electorate) \ outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("More X") \ areg voteshare averagebeauty incumbent female if X<0,a(partyno) r cl(electorate) \ outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Less X") \ areg voteshare averagebeauty b_X X incumbent female,a(partyno) r cl(electorate) \ outreg using beauty_results.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Interact X")
STOP

***************************
* Pretty pictures
***************************
set scheme s1mono
la var averagebeauty "Beauty rating (in standard deviations)"

* There are some common last names. For Figs 2-5, we need to distinguish people who will appear on the same graph 
* (We don't worry about repetition of names on Figs 6-7 - they're pretty hard to read anyhow.)
gen name=lastname
replace name="J.BISHOP" if firstname=="Julie" & lastname=="BISHOP"
replace name="B.BISHOP" if firstname=="Bronwyn" & lastname=="BISHOP"
replace name="M.FERGUSON" if firstname=="Martin" & lastname=="FERGUSON"
replace name="L.FERGUSON" if firstname=="Laurie" & lastname=="FERGUSON"
replace name="R.GRAY" if firstname=="Roger" & lastname=="GRAY"
replace name="J.GRAY" if firstname=="John" & lastname=="GRAY"
replace name="D.KELLY" if firstname=="De-Anne" & lastname=="KELLY"
replace name="J.KELLY" if firstname=="Jackie" & lastname=="KELLY"
replace name="B.KING" if firstname=="Bruce" & lastname=="KING"
replace name="P.KING" if firstname=="Paul" & lastname=="KING"
replace name="C.MCCONNELL" if firstname=="Craig" & lastname=="MCCONNELL"
replace name="D.MCCONNELL" if firstname=="David" & lastname=="MCCONNELL"
replace name="B.O'CONNOR" if firstname=="Brendan" & lastname=="O'CONNOR"
replace name="G.O'CONNOR" if firstname=="Gavan" & lastname=="O'CONNOR"
replace name="D.SMITH" if firstname=="Daniel" & lastname=="SMITH"
replace name="G.SMITH" if firstname=="Greg" & lastname=="SMITH"
replace name="T.SMITH" if firstname=="Tony" & lastname=="SMITH"
replace name="S.SMITH" if firstname=="Stephen" & lastname=="SMITH"
replace name="J.WILLIAMS" if firstname=="John" & lastname=="WILLIAMS"
replace name="A.WILLIAMS" if firstname=="Allan" & lastname=="WILLIAMS"
bysort name female incumbent: egen count=count(female)
list firstname lastname if count==2
replace name=proper(name)

* These regressions go in the notes to the tables.
for X in num 0 1: for Y in num 0 1: reg voteshare averagebeauty if incumbent==X & female==Y,r

twoway scatter voteshare averagebeauty if incumbent==0 & female==0, mlabel(name) || lfit voteshare averagebeauty if incumbent==0 & female==0, yti("Voteshare") ti("Figure 2: Male Challengers") legend(off) note("Voteshare = 0.36 + 0.035 * Beauty (t=4.4)" "N=119") 
twoway scatter voteshare averagebeauty if incumbent==1 & female==0, mlabel(name) || lfit voteshare averagebeauty if incumbent==1 & female==0, yti("Voteshare") ti("Figure 3: Male Incumbents") legend(off) note("Voteshare = 0.52 + 0.017 * Beauty (t=2.3)" "N=93")  
twoway scatter voteshare averagebeauty if incumbent==0 & female==1, mlabel(name) || lfit voteshare averagebeauty if incumbent==0 & female==1, yti("Voteshare") ti("Figure 4: Female Challengers") legend(off) note("Voteshare = 0.34 + 0.005 * Beauty (t=0.5)" "N=40")
twoway scatter voteshare averagebeauty if incumbent==1 & female==1, mlabel(name) || lfit voteshare averagebeauty if incumbent==1 & female==1, yti("Voteshare") ti("Figure 5: Female Incumbents") legend(off) note("Voteshare = 0.50 + 0.012 * Beauty (t=0.6)" "N=34") 

***************************************************
* What have margins looked like from 1996-2004?
***************************************************
cd "C:\Users\Andrew\My publications\Aust - beautiful politicians etc\"
use elections1984_2004_demographics.dta, clear
sum cvote
gen margin=abs(cvote-50)
egen tag=tag(election electorate)
sum margin if tag,d
for num 1/4: gen marginX=margin \ recode marginX 0/X=1 .=. *=0 
gen margin14=margin
recode margin14 0/1.4=1 .=. *=0 
sum margin1-margin4 margin14 if tag
tab margin14 if tag

***************************************************
* Political engagement of electorate
* 1996, 1998 & 2001 AES
***************************************************
clear
set more off
cd "C:\Users\Andrew\My publications\Aust - beautiful politicians etc\"
*use elecdiv b3 b1 using "C:\Users\Andrew\Datasets\Australian Election Studies\AES2004\d1079.dta", clear
*append using "C:\Users\Andrew\Datasets\Australian Election Studies\AES2001\aes2001.dta", keep(b3 elecdiv b1)
use a1 a4 b3 b1 elecdiv using "C:\Users\Andrew\Datasets\Australian Election Studies\AES2001\aes2001.dta", clear
gen year=2001
append using "C:\Users\Andrew\Datasets\Australian Election Studies\AES1998\aes1998.dta", keep(a1 a4 b3 elecdiv b1)
replace year=1998 if year==.
append using "C:\Users\Andrew\Datasets\Australian Election Studies\AES1996\aes1996.dta", keep(a1 a4 b4 elect b1)
replace year=1996 if year==.
replace b3=b4 if b4~=.
replace elecdiv=elect if elect~=.
ren a1 uninterestp
ren a4 unintereste
for var uninterest*: recode X 1/3=0 4=1
ren b3 apathetic 
recode apathetic 1=0 2=0 3=1 *=.
decode elecdiv, generate(temp) 
gen electorate4=substr(proper(temp),1,4)
replace electorate4="KiSm" if temp=="kingsford-smith"
replace electorate4="MePo" if temp=="melbourne ports"
replace electorate4="BrFi" if temp=="bradfield"
for any Mcew Mcmi Mcph \ any McEw McMi McPh: replace electorate4="Y" if electorate4=="X"
codebook electorate4
* Testing correlation in apathy across surveys
*for num 1996 1998 2001: gen apatheticX=apathetic if year==X
*collapse apathetic1996 apathetic1998 apathetic2001,by(electorate4)
*pwcorr apathetic*
collapse apathetic uninterest* (count) n=apathetic,by(electorate4)
expand 3 if electorate4=="BrFi"
bysort electorate4: egen seq=seq()
replace electorate4="Bonn" if seq==2
replace electorate4="Gort" if seq==3
drop seq
sum
pwcorr
sort electorate4
save temp_aes_interest, replace
